source("../functions.R")
tol12qualitative=c("#332288", "#6699CC", "#88CCEE", "#44AA99", "#117733", "#999933", "#DDCC77", "#661100", "#CC6677", "#AA4466", "#882255", "#AA4499")
if (!file.exists("output")) {
system("mkdir output")
}
parallel = TRUE
ncores = 10
if (!require(DCARS)) {
library(devtools)
devtools::install_github("shazanfar/DCARS")
}
library(DCARS)
library(ggplot2)
library(gplots)
library(SingleCellExperiment)
library(scater)
library(scran)
library(reshape)
library(scattermore)
library(dynamicTreeCut)
library(ComplexHeatmap)
library(GO.db)
library(org.Mm.eg.db)
library(stringr)
library(matrixStats)
library(ggforce)
library(patchwork)
library(ggpubr)
library(cowplot)
library(parallel)
library(GGally)
library(corrplot)
library(UpSetR)
if (!file.exists("output/counts_coords.RData")) {
# data file URL https://www.spatialresearch.org/wp-content/uploads/2016/07/Rep11_MOB_count_matrix-1.tsv
# might take a minute to download
counts_raw = read.delim(url("https://www.spatialresearch.org/wp-content/uploads/2016/07/Rep11_MOB_count_matrix-1.tsv"), header = TRUE, row.names = 1)
# counts_raw = read.delim(file = "Rep11_MOB_count_matrix-1.tsv", header = TRUE, row.names = 1)
dim(counts_raw)
coords_raw = do.call(rbind, strsplit(rownames(counts_raw), "x"))
# xy plane only
coords = apply(coords_raw, 1:2, as.numeric)
colnames(coords) <- c("x","y")
rownames(coords) <- rownames(counts_raw)
plot(coords[,"x"], coords[,"y"])
counts = t(counts_raw)
sce = SingleCellExperiment(assays = list(counts = counts),
colData = coords)
sce <- scater::normalize(sce)
var.fit <- trendVar(sce, parametric=TRUE, loess.args=list(span=0.3), use.spikes = FALSE)
var.out <- decomposeVar(sce, var.fit)
pdf(file = "output/HVG_selection.pdf", height = 8, width = 8)
plot(var.out$mean, var.out$total, pch=16, cex=0.6, xlab="Mean log-expression",
ylab="Variance of log-expression")
curve(var.fit$trend(x), col="dodgerblue", lwd=2, add=TRUE)
seqvals = seq(min(var.out$mean), max(var.out$mean), length.out = 1000)
peakExp = seqvals[which.max(var.fit$trend(seqvals))]
hvg.out <- var.out[which(var.out$FDR <= 0.05 & var.out$mean > peakExp),]
nrow(hvg.out)
hvg.out <- hvg.out[order(hvg.out$bio, decreasing=TRUE),]
points(var.out$mean[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)],
var.out$total[which(var.out$FDR <= 0.05 & var.out$mean > peakExp)], col="red", pch=16)
abline(v = peakExp, lty = 2, col = "black")
dev.off()
head(hvg.out)
HVG = sort(rownames(hvg.out))
expr = logcounts(sce)
save(counts, coords, sce, HVG, expr, file = "output/counts_coords.RData")
} else {
load("output/counts_coords.RData")
}
W = weightMatrix_nD(coords, span = 0.05)
This takes a long time (approx 2 hours) to run.
#### test for spatial differential expression by using weightedMean
# results in a list of non-spatially DE genes
samepairs = cbind(rownames(expr), rownames(expr))
rownames(samepairs) = samepairs[,1]
if (!file.exists("output/DE_stats_all.Rds")) {
DE_stats_all = DCARSacrossNetwork(expr,
samepairs,
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
extractTestStatisticOnly = TRUE,
niter = 1000,
verbose = FALSE)
saveRDS(DE_stats_all, file = "output/DE_stats_all.Rds")
} else {
DE_stats_all = readRDS("output/DE_stats_all.Rds")
}
if (!file.exists("output/DE_sampled_permstats.Rdata")) {
# Does globalCor relate to the null distribution for genes?
set.seed(500)
globalMean = rowMeans(expr)
pairs_sampled = samepairs[DCARS::stratifiedSample(globalMean, length = 100),]
sampled_permstats = DCARSacrossNetwork(expr,
pairs_sampled,
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
extractPermutationTestStatistics = TRUE,
niter = 1000,
verbose = TRUE)
save(sampled_permstats, pairs_sampled, globalMean, file = "output/DE_sampled_permstats.Rdata")
} else {
load("output/DE_sampled_permstats.Rdata")
}
plot(globalMean[rownames(pairs_sampled)], unlist(lapply(unlist(sampled_permstats, recursive = FALSE),quantile, 0.99)))
points(globalMean[rownames(pairs_sampled)], unlist(lapply(unlist(sampled_permstats, recursive = FALSE),quantile, 0.95)), col = "red")
points(globalMean[rownames(pairs_sampled)], unlist(lapply(unlist(sampled_permstats, recursive = FALSE),quantile, 0.90)), col = "blue")
# looks like there is a general relationship between global mean and the null distribution, but this does not appear to be a strong enough association to predict null distribution by just globalMean
# will need to run full permutation testing
if (!file.exists("output/DE_n100_permstats.Rds")) {
set.seed(500)
if (!parallel) {
n100_permstats = DCARSacrossNetwork(expr,
samepairs,
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
extractPermutationTestStatistics = TRUE,
niter = 100,
verbose = TRUE)
} else {
split_df = split.data.frame(samepairs, rep(1:ncores, length.out = nrow(samepairs)))
names(split_df) <- NULL
n100_permstats = mclapply(split_df, function(s) {
res = DCARSacrossNetwork(expr,
s,
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
extractPermutationTestStatistics = TRUE,
niter = 100,
verbose = TRUE)
res = lapply(res, unlist)
names(res) <- rownames(s)
return(res)
})
n100_permstats = unlist(n100_permstats, recursive = FALSE)
n100_permstats <- n100_permstats[rownames(samepairs)]
}
saveRDS(n100_permstats, file = "output/DE_n100_permstats.Rds")
} else {
n100_permstats = readRDS("output/DE_n100_permstats.Rds")
}
n100_pval = sapply(names(DE_stats_all), function(gene){
mean(unlist(n100_permstats[[gene]]) >= DE_stats_all[gene])
})
# recalculate p-value for those with pval < 0.1 with 1000 iter
# aim is to remove these genes at p-value < 0.05 level
if (!file.exists("output/DE_n1000_pval.Rds")) {
set.seed(500)
if (!parallel) {
n1000_pval = DCARSacrossNetwork(expr,
samepairs[n100_pval < 0.1,],
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
niter = 1000,
verbose = TRUE)
} else {
split_df = split.data.frame(samepairs[n100_pval < 0.1,], rep(1:ncores, length.out = nrow(samepairs[n100_pval < 0.1,])))
names(split_df) <- NULL
n1000_pval = mclapply(split_df, function(s) {
res = DCARSacrossNetwork(expr,
s,
W = W,
weightedConcordanceFunction = weightedMeanMatrixStats,
niter = 1000,
verbose = TRUE)
res = lapply(res, unlist)
names(res) <- rownames(s)
return(res)
})
n1000_pval = unlist(n1000_pval)
n1000_pval <- n1000_pval[rownames(samepairs[n100_pval<0.1,])]
}
saveRDS(n1000_pval, file = "output/DE_n1000_pval.Rds")
} else {
n1000_pval = readRDS("output/DE_n1000_pval.Rds")
}
DE_pval = n100_pval
DE_pval[names(n1000_pval)] <- n1000_pval
DE_fdr = p.adjust(DE_pval, method = "BH")
nonDEgenes_fdr = names(which(DE_fdr > 0.05))
saveRDS(nonDEgenes_fdr, file = "output/nonDEgenes_fdr.Rds")
nonDEgenes = names(which(DE_pval > 0.05))
saveRDS(nonDEgenes, file = "output/nonDEgenes.Rds")
pairs = t(combn(intersect(HVG, nonDEgenes),2))
rownames(pairs) <- apply(pairs,1,paste0,collapse = "_")
dim(pairs)
## [1] 903 2
if (!file.exists("output/sampled_permstats.Rdata")) {
set.seed(500)
# Does globalCor relate to the null distribution for genes?
globalCor = apply(pairs, 1, function(x) cor(counts[x[1],], counts[x[2],], method = "spearman"))
pairs_sampled = pairs[DCARS::stratifiedSample(globalCor, length = 200),]
if (!parallel) {
sampled_permstats = DCARSacrossNetwork(counts,
pairs_sampled,
W = W,
weightedConcordanceFunction = weightedSpearman,
extractPermutationTestStatistics = TRUE,
niter = 1000,
verbose = TRUE)
} else {
split_df = split.data.frame(pairs_sampled, rep(1:ncores, length.out = nrow(pairs_sampled)))
names(split_df) <- NULL
sampled_permstats = mclapply(split_df, function(s) {
res = DCARSacrossNetwork(counts,
s,
W = W,
weightedConcordanceFunction = weightedSpearman,
extractPermutationTestStatistics = TRUE,
niter = 1000,
verbose = TRUE)
res = lapply(res, unlist)
names(res) <- rownames(s)
return(res)
})
sampled_permstats = unlist(sampled_permstats, recursive = FALSE)
sampled_permstats <- sampled_permstats[rownames(pairs_sampled)]
}
save(sampled_permstats, pairs_sampled, globalCor, file = "output/sampled_permstats.Rdata")
} else {
load("output/sampled_permstats.Rdata")
}
permstatsDF = data.frame(
genepair = rep(names(sampled_permstats), times = unlist(lapply(sampled_permstats, function(x) length(unlist(x))))),
stat = unlist(sampled_permstats)
)
permstatsDF$globalCor = globalCor[as.character(permstatsDF$genepair)]
df_99 = data.frame(
genepair = names(sampled_permstats),
globalCor = globalCor[names(sampled_permstats)],
stat_999 = unlist(lapply(sampled_permstats, function(x) quantile(unlist(x), 0.999))),
stat_99 = unlist(lapply(sampled_permstats, function(x) quantile(unlist(x), 0.99))),
stat_95 = unlist(lapply(sampled_permstats, function(x) quantile(unlist(x), 0.95))),
stat_90 = unlist(lapply(sampled_permstats, function(x) quantile(unlist(x), 0.90)))
)
df_99$fitted_999 = loess(stat_999 ~ globalCor, data = df_99)$fitted
df_99$fitted_99 = loess(stat_99 ~ globalCor, data = df_99)$fitted
df_99$fitted_95 = loess(stat_95 ~ globalCor, data = df_99)$fitted
df_99$fitted_90 = loess(stat_90 ~ globalCor, data = df_99)$fitted
g = ggplot(permstatsDF, aes(x = globalCor, y = stat)) +
theme_classic() +
geom_scattermore() +
geom_point(aes(y = stat_999, colour = "0.999"), data = df_99) +
geom_line(aes(y = fitted_999, colour = "0.999"), data = df_99) +
geom_point(aes(y = stat_99, colour = "0.99"), data = df_99) +
geom_line(aes(y = fitted_99, colour = "0.99"), data = df_99) +
geom_point(aes(y = stat_95, colour = "0.95"), data = df_99) +
geom_line(aes(y = fitted_95, colour = "0.95"), data = df_99) +
geom_point(aes(y = stat_90, colour = "0.90"), data = df_99) +
geom_line(aes(y = fitted_90, colour = "0.90"), data = df_99) +
scale_colour_manual(values = c("0.999" = tol12qualitative[4],
"0.99" = tol12qualitative[3],
"0.95" = tol12qualitative[2],
"0.90" = tol12qualitative[1])) +
labs(colour = "Quantile") +
theme(panel.grid = element_blank()) +
xlab("Global Correlation") +
ylab("Permuted test statistics") +
theme(legend.position = "bottom")
g
ggsave(g, file = "output/stats_globalCor_2d.pdf", height = 8, width = 8)
if (!file.exists("output/2D_p_all.Rdata")) {
# estimate the set of p-values
stats_all = DCARSacrossNetwork(counts,
pairs,
W = W,
weightedConcordanceFunction = weightedSpearman,
extractTestStatisticOnly = TRUE,
niter = 1000,
verbose = TRUE)
wcors_all = t(DCARSacrossNetwork(counts,
pairs,
W = W,
weightedConcordanceFunction = weightedSpearman,
extractWcorSequenceOnly = TRUE,
niter = 1000,
verbose = TRUE))
p_all = estimatePvaluesSpearman(stats_all,
globalCor,
sampled_permstats,
usenperm = TRUE,
nperm = 10000,
plot = FALSE,
maxDist = 2,
verbose = TRUE)
p_all$fdr <- p.adjust(p_all$pval, method = "BH")
save(p_all, stats_all, wcors_all, file = "output/2D_p_all.Rdata")
} else {
load("output/2D_p_all.Rdata")
}
p_all$gene1 = unlist(lapply(strsplit(as.character(p_all$genepair), "_"), "[", 1))
p_all$gene2 = unlist(lapply(strsplit(as.character(p_all$genepair), "_"), "[", 2))
# differentially expressed genes to remove
p_all$spatiallyDE = ifelse(as.character(p_all$gene1) %in% nonDEgenes & as.character(p_all$gene2) %in% nonDEgenes,"No", "DE")
dim(subset(p_all, spatiallyDE == "No"))
## [1] 903 11
nonDEfdr = p_all$pval
nonDEfdr[p_all$spatiallyDE == "DE"] <- NA
nonDEfdr = p.adjust(nonDEfdr, method = "BH")
nonDEfdr[is.na(nonDEfdr)] <- 1
p_all$nonDEfdr = nonDEfdr
getDF = function(gene1, gene2 = NULL) {
if (length(gene1) > 1) {
gene2 = gene1[2]
gene1 = gene1[1]
}
gpair = paste0(gene1,"_",gene2)
if (!gpair %in% rownames(wcors_all)) {
gpair = paste0(gene2,"_",gene1)
}
wcor = wcors_all[gpair,]
df_res = data.frame(x = coords[,"x"],
y = coords[,"y"],
g1 = expr[gene1,],
g2 = expr[gene2,],
wcor = wcor,
W_min = W[which.min(wcor),],
W_max = W[which.max(wcor),])
return(df_res)
}
basicplotFunction = function(gene1, gene2 = NULL) {
require(ggforce)
require(patchwork)
require(ggpubr)
if (length(gene1) > 1) {
gene2 = gene1[2]
gene1 = gene1[1]
}
df_res = getDF(gene1, gene2)
t = theme(legend.key.width = unit(0.5, "inches")) +
theme(plot.title = element_text(size = 20)) +
theme(axis.title = element_text(size = 15))
g_gene1 = ggplot(df_res, aes(x = -x, y = y)) +
geom_point(aes(colour = g1), size = 5) +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text = element_blank()) +
xlab("") +
ylab("") +
ggtitle(gene1) +
labs(colour = "") +
theme(legend.position = "bottom") +
theme(plot.title = element_text(hjust = 0.5, face = "italic")) +
scale_color_viridis_c(breaks = c(0,max(df_res$g1)),
limits = c(0,max(df_res$g1)),
labels = c("Low","High")) +
t +
coord_fixed() +
guides(colour = guide_colourbar(title.position = "top",
title.hjust = 0.5)) +
theme(legend.title=element_text(size=15)) +
labs(colour = "Expression") +
NULL
g_gene2 = ggplot(df_res, aes(x = -x, y = y)) +
geom_point(aes(colour = g2), size = 5) +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text = element_blank()) +
xlab("") +
ylab("") +
ggtitle(gene2) +
theme(plot.title = element_text(hjust = 0.5, face = "italic")) +
labs(colour = "") +
theme(legend.position = "bottom") +
scale_color_viridis_c(breaks = c(0,max(df_res$g1)),
limits = c(0,max(df_res$g1)),
labels = c("Low","High")) +
t +
coord_fixed() +
theme(legend.position = "none") +
NULL
g2 = ggplot(df_res, aes(x = -x, y = y, fill = wcor)) +
geom_voronoi_tile(max.radius = 1) +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.text = element_blank()) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "bottom") +
labs(colour = "",fill = "") +
labs(colour = "Local correlation",fill = "Local correlation") +
ylab("") +
xlab("") +
ggtitle("Correlation of both genes") +
scale_alpha_continuous(range = c(0,0.5)) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(-1,1)) +
t +
coord_fixed() +
guides(fill = guide_colourbar(title.position = "top",
title.hjust = 0.5)) +
theme(legend.title=element_text(size=15)) +
NULL
g_gene1_leg = as_ggplot(get_legend(g_gene1))
g2_leg = as_ggplot(get_legend(g2))
scater::multiplot(g_gene1 + theme(legend.position = "none") +
theme(plot.margin = margin(10,0,-10,0)),
g_gene2 +
theme(plot.margin = margin(10,0,-10,0)),
g2 + theme(legend.position = "none") +
theme(plot.margin = margin(10,0,-10,0)),
g_gene1_leg,
g2_leg,
layout = matrix(
c(1,1,2,2,3,3,
1,1,2,2,3,3,
1,1,2,2,3,3,
6,4,4,6,5,5), ncol = 6, byrow = TRUE))
}
plotFunction = function(gene1, gene2 = NULL) {
require(ggforce)
require(patchwork)
if (length(gene1) > 1) {
gene2 = gene1[2]
gene1 = gene1[1]
}
df_res = getDF(gene1, gene2)
g_W_min = ggplot(df_res, aes(x = g1, y = g2)) +
geom_point(aes(alpha = W_min, size = W_min), colour = "purple") +
theme_minimal() +
xlab(gene1) +
ylab(gene2) +
ggtitle("Min") +
NULL
g_W_max = ggplot(df_res, aes(x = g1, y = g2)) +
geom_point(aes(alpha = W_max, size = W_max), colour = "orange") +
theme_minimal() +
xlab(gene1) +
ylab(gene2) +
ggtitle("Max") +
NULL
g_xy = ggplot(df_res, aes(x = x, y = y)) +
geom_point(size = 0.1) +
geom_density_2d(data = subset(df_res, W_max != 0), colour = "orange") +
geom_density_2d(data = subset(df_res, W_min != 0), colour = "purple") +
theme_minimal() +
xlab("x coordinate") +
ylab("y coordinate") +
ggtitle("Positions") +
NULL
g_gene1 = ggplot(df_res, aes(x = x, y = y)) +
geom_point(aes(colour = g1), size = 5) +
theme_minimal() +
ggtitle(gene1) +
scale_color_gradient2(low = "black", mid = "yellow", high = "red", midpoint = 2) +
NULL
g_gene2 = ggplot(df_res, aes(x = x, y = y)) +
geom_point(aes(colour = g2), size = 5) +
theme_minimal() +
ggtitle(gene2) +
scale_color_gradient2(low = "black", mid = "yellow", high = "red", midpoint = 2) +
NULL
g2 = ggplot(df_res, aes(x = x, y = y, fill = wcor)) +
geom_voronoi_tile(max.radius = 1) +
geom_point(size = 1, colour = "black") +
theme_minimal() +
scale_alpha_continuous(range = c(0,0.5)) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(-1,1)) +
NULL
return(g_gene1 + g_gene2 + g2 +
g_W_min + g_W_max + g_xy + plot_layout(ncol = 3, nrow = 2, byrow = TRUE))
}
FDR_level = 0.2
wcorsSig = wcors_all[p_all$nonDEfdr < FDR_level,]
pairsSig = pairs[p_all$nonDEfdr < FDR_level,]
dim(wcorsSig)
## [1] 167 262
# basic plot functions of sig pairs
apply(pairsSig,1, function(p) {
#print(p)
if (!file.exists("output/basicplots")) {
system("mkdir output/basicplots")
}
pdf(paste0("output/basicplots/", p[1], "_", p[2], ".pdf"),
height = 4.5, width = 12,onefile=FALSE)
basicplotFunction(p)
dev.off()
})
## Abat_Arrb1 Abat_Cst3 Abat_Mlc1 Abat_Myo10
## 2 2 2 2
## Abat_Slc1a3 Abat_Tiam1 Actb_Gucy1b3 Actb_Itm2c
## 2 2 2 2
## Actb_Myo10 Actb_Smim13 Actb_Sp8 Actb_Tiam1
## 2 2 2 2
## Actb_Vdac3 Arrb1_Cnp Arrb1_Cst3 Arrb1_Dnm3
## 2 2 2 2
## Arrb1_Mtor Arrb1_Prune2 Arrb1_Smim13 Arrb1_Sp8
## 2 2 2 2
## Arrb1_Tmem47 Arrb1_Uchl1 Atp6v1c1_Calm1 Atp6v1c1_Cst3
## 2 2 2 2
## Atp6v1c1_Luzp2 Atp6v1c1_Mlc1 Atp6v1c1_Myo10 Atp6v1c1_Plekha1
## 2 2 2 2
## Atp6v1c1_Sp8 Atp6v1c1_Tiam1 Atp6v1c1_Tmsb4x Atrnl1_Ccng1
## 2 2 2 2
## Atrnl1_Gm15421 Atrnl1_Myo10 Bai1_Ccng1 Bai1_Cst3
## 2 2 2 2
## Bai1_Gucy1b3 Bai1_Slc1a3 Bai1_Tiam1 Calm1_Cst3
## 2 2 2 2
## Calm1_Dnm1l Calm1_Gucy1b3 Calm1_Plekha1 Calm1_Smim13
## 2 2 2 2
## Calm1_Sp8 Calm1_Tiam1 Calm1_Vdac3 Calm1_Wdfy3
## 2 2 2 2
## Ccng1_Cldn11 Ccng1_Erbb2ip Ccng1_Scd2 Ccng1_Smim13
## 2 2 2 2
## Cldn11_Cnp Cldn11_Cst3 Cldn11_Gucy1b3 Cldn11_Ildr2
## 2 2 2 2
## Cldn11_Itm2c Cldn11_Scd2 Cldn11_Tuba1a Cldn11_Vdac3
## 2 2 2 2
## Cnp_Itm2c Cnp_Prune2 Cnp_Tmsb4x Cst3_Gucy1b3
## 2 2 2 2
## Cst3_Impact Cst3_Itm2c Cst3_Luzp2 Cst3_Mtor
## 2 2 2 2
## Cst3_Myo10 Cst3_Prrc2b Cst3_Prune2 Cst3_Smim13
## 2 2 2 2
## Cst3_Sp8 Cst3_Tuba1a Cst3_Uchl1 Cst3_Wdfy3
## 2 2 2 2
## Dnm1l_Dnm3 Dnm1l_Fam63b Dnm1l_Plekha1 Dnm1l_Sp8
## 2 2 2 2
## Dnm1l_Vdac3 Dnm3_Fam63b Dnm3_Tiam1 Gm15421_Ildr2
## 2 2 2 2
## Gm15421_Itm2c Gm15421_Tmsb4x Gucy1b3_Ildr2 Gucy1b3_Mlc1
## 2 2 2 2
## Gucy1b3_Myo10 Gucy1b3_Plekha1 Gucy1b3_Smim13 Gucy1b3_Tiam1
## 2 2 2 2
## Gucy1b3_Tmem47 Gucy1b3_Tmsb4x Gucy1b3_Tuba1a Gucy1b3_Uchl1
## 2 2 2 2
## Igf2_Mtor Igf2_Pmm1 Ildr2_Itm2c Ildr2_Mlc1
## 2 2 2 2
## Ildr2_Plekha1 Ildr2_Tiam1 Ildr2_Tuba1a Impact_Mlc1
## 2 2 2 2
## Impact_Myo10 Impact_Ntng1 Impact_Plekha1 Impact_Tiam1
## 2 2 2 2
## Impact_Tmem47 Itm2c_Myo10 Itm2c_Pmm1 Itm2c_Prrc2b
## 2 2 2 2
## Itm2c_Smim13 Itm2c_Sp8 Itm2c_Tiam1 Itm2c_Tuba1a
## 2 2 2 2
## Itm2c_Uchl1 Itm2c_Vdac3 Luzp2_Uchl1 Mlc1_Mtor
## 2 2 2 2
## Mlc1_Myo10 Mlc1_Prune2 Mlc1_Slc1a3 Mlc1_Uchl1
## 2 2 2 2
## Mtor_Myo10 Mtor_Slc1a3 Mtor_Tmem47 Mtor_Vdac3
## 2 2 2 2
## Mtor_Wdfy3 Myo10_Prune2 Myo10_Scd2 Myo10_Slc1a3
## 2 2 2 2
## Myo10_Sp8 Myo10_Tiam1 Myo10_Uchl1 Ntng1_Tiam1
## 2 2 2 2
## Ntng1_Utrn Plekha1_Tiam1 Plekha1_Tmsb4x Plekha1_Vdac3
## 2 2 2 2
## Prrc2b_Sp8 Prrc2b_Tiam1 Prrc2b_Vdac3 Prune2_Tiam1
## 2 2 2 2
## Prune2_Tuba1a Prune2_Wdfy3 Scd2_Smim13 Scd2_Sp8
## 2 2 2 2
## Smim13_Sp8 Smim13_Tmem47 Smim13_Tmsb4x Smim13_Tuba1a
## 2 2 2 2
## Smim13_Vdac3 Sp8_Tmsb4x Sp8_Tuba1a Sp8_Uchl1
## 2 2 2 2
## Sp8_Vdac3 Tiam1_Tmsb4x Tiam1_Tuba1a Tiam1_Vdac3
## 2 2 2 2
## Tiam1_Wdfy3 Tmsb4x_Tuba1a Tmsb4x_Vdac3 Tuba1a_Uchl1
## 2 2 2 2
## Tuba1a_Vdac3 Uchl1_Vdac3 Vdac3_Wdfy3
## 2 2 2
cellsCut = cutree(hclust(cordist(t(wcorsSig))), 20)
table(cellsCut)
## cellsCut
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 15 16 19 10 7 12 18 4 6 27 6 9 36 10 12 8 12 8 7 20
sort(table(cellsCut))
## cellsCut
## 8 9 11 5 19 16 18 12 4 14 6 15 17 1 2 7 3 20 10 13
## 4 6 6 7 7 8 8 9 10 10 12 12 12 15 16 18 19 20 27 36
df_res = getDF(pairs[1,])
df_res$cellsCut <- factor(cellsCut)
ggplot(df_res, aes(x = x, y = y, fill = cellsCut, colour = cellsCut)) +
geom_point(size = 7, shape = 21, stroke = 1.5, colour = "black") +
theme_minimal() +
scale_alpha_continuous(range = c(0,0.5)) +
NULL
ggsave(file = "output/cellsCluster_spatial.pdf", height = 8, width = 10)
hc = hclust(dist(wcorsSig, method = "maximum"), method = "complete")
genepairsClustDynamic = cutreeDynamic(
hc,
minClusterSize = 10,
method = "tree",
deepSplit = TRUE,
useMedoids = FALSE
)
kk = length(unique(genepairsClustDynamic))-1
kk
## [1] 9
genepairsClust = cutree(hc, k = kk)
plot(hc)
table(genepairsClust)
## genepairsClust
## 1 2 3 4 5 6 7 8 9
## 18 35 10 16 19 24 33 8 4
sort(table(genepairsClust))
## genepairsClust
## 9 8 3 4 1 5 6 7 2
## 4 8 10 16 18 19 24 33 35
p_sig = cbind(p_all[names(genepairsClust),], cluster = genepairsClust)
saveRDS(p_sig, file = "output/p_sig.Rds")
write.table(as.matrix(sort_df(p_sig, c("cluster","genepair"))),
file = "output/sig_genepairs.tsv", row.names = FALSE,
col.names = TRUE, quote = FALSE, sep = "\t")
write.table(as.matrix(genepairsClust), file = "output/genepairsClust.tsv", row.names = TRUE,
col.names = FALSE, quote = FALSE, sep = "\t")
genepairs_split = lapply(split(names(genepairsClust), genepairsClust), function(x) t(do.call(cbind, strsplit(x, "_"))))
genepairs_split_genes = lapply(genepairs_split, function(x) sort(unique(c(x))))
sapply(names(genepairs_split_genes), function(i){
write(genepairs_split_genes[[i]], file = paste0("output/cluster_", i,".txt"))
})
## $`1`
## NULL
##
## $`2`
## NULL
##
## $`3`
## NULL
##
## $`4`
## NULL
##
## $`5`
## NULL
##
## $`6`
## NULL
##
## $`7`
## NULL
##
## $`8`
## NULL
##
## $`9`
## NULL
geneClustMembers = sapply(unique(genepairsClust), function(x)
unique(c(pairsSig[genepairsClust == x,])), simplify = FALSE)
names(geneClustMembers) <- paste0("cluster_", unique(genepairsClust))
saveRDS(geneClustMembers, file = "output/geneClustMembers.Rds")
jacDist_pairs = expand.grid(names(geneClustMembers),names(geneClustMembers))
jacDist_vals = apply(jacDist_pairs,1,function(x){
if (x[1] == x[2]) return(NA)
set1 = geneClustMembers[[x[1]]]
set2 = geneClustMembers[[x[2]]]
return(length(intersect(set1,set2))/length(union(set1,set2)))
})
jacDist = as.matrix(reshape::cast(cbind(jacDist_pairs, jacDist_vals), formula = Var2 ~ Var1, value = "jacDist_vals"))
pdf("output/jacDist_clusters.pdf", height = 8, width = 8)
heatmap.2(jacDist, trace = "n", main = "Jaccard distance of genes within clusters",
density.info = "none",
key.title = "",
key.xlab = "Jaccard distance",
symm = TRUE,
revC = TRUE)
dev.off()
## quartz_off_screen
## 2
meanwcorsSig = apply(wcorsSig, 2, function(x) {
tapply(x, genepairsClust, mean)
})
rownames(meanwcorsSig) <- paste0("cluster_", 1:length(unique(genepairsClust)))
pdf("output/meanGenes_heatmap.pdf", height = 12, width = 24)
heatmap.2(meanwcorsSig, trace = "n", col = colorRampPalette(c("blue","white","red")),
key.title = "",
key.xlab = "Mean weighted correlation",
main = "Mean weighted correlation of clustered genepairs")
dev.off()
## quartz_off_screen
## 2
df_res2 <- getDF(pairs[1,])
df_res2 <- cbind(df_res2, t(meanwcorsSig))
gList_cells <- sapply(rownames(meanwcorsSig), function(name) {
ggplot(df_res2, aes(x = -x, y = y, fill = get(name))) +
geom_voronoi_tile(max.radius = 1) +
geom_point(size = 0.5, colour = "black") +
theme_minimal() +
scale_alpha_continuous(range = c(0,0.5)) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(-1,1)) +
ggtitle("") +
theme(legend.position = "none") +
theme(panel.grid = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()) +
xlab("") + ylab("") +
theme(plot.title = element_text(hjust = 0.5)) +
coord_fixed() +
NULL
}, simplify = FALSE)
gAll = patchwork::wrap_plots(gList_cells, ncol = length(gList_cells)/5, nrow = 5)
gAll
ggsave(gAll, file = "output/genepairsClust_wcors.pdf", height = 5, width = 44)
hc_mean_cells = hclust(dist(t(wcorsSig), method = "euclidean"), method = "complete")
hc_mean_cells_groups = cutreeDynamic(
hc_mean_cells,
minClusterSize = 10,
method = "tree",
deepSplit = FALSE,
useMedoids = FALSE
)
kk_cells = length(unique(hc_mean_cells_groups))-1
kk_cells
## [1] 8
hc_mean_cells_groups = cutree(hc_mean_cells, kk_cells)
plot(hc_mean_cells)
plot(hc_mean_cells, labels = hc_mean_cells_groups)
hc_mean_cells_fac = factor(hc_mean_cells_groups, levels = unique(hc_mean_cells_groups[hc_mean_cells$order]))
df_hc_mean_cells = data.frame(
x = coords[,1],
y = coords[,2],
hc_mean_cells = hc_mean_cells_fac
)
s = 1
g_cells = ggplot(df_hc_mean_cells, aes(x = -x, y = y)) +
geom_point(data = df_hc_mean_cells[,1:2], alpha = 0.2, stroke = 0, size = s, pch = 16) +
geom_point(stroke = 0, size = s, pch = 16) +
facet_grid(hc_mean_cells~.) +
theme_minimal() +
theme(panel.grid = element_blank(),
axis.text = element_blank()) +
xlab("") +
ylab("") +
coord_fixed() +
NULL
g_cells
ggsave(g_cells, file = "output/split_heatmap_cells.pdf",height = 7, width = 1.5)
hc_mean_genepairs = hc
hc_mean_genepairs_groups = genepairsClust
plot(hc_mean_genepairs)
plot(hc_mean_genepairs, labels = hc_mean_genepairs_groups)
hc_mean_genepairs_fac = factor(hc_mean_genepairs_groups, levels = unique(hc_mean_genepairs_groups[hc_mean_genepairs$order]))
split(names(hc_mean_genepairs_fac), hc_mean_genepairs_fac)
## $`5`
## [1] "Actb_Gucy1b3" "Bai1_Gucy1b3" "Calm1_Gucy1b3" "Calm1_Vdac3"
## [5] "Cldn11_Gucy1b3" "Cldn11_Tuba1a" "Cldn11_Vdac3" "Cnp_Prune2"
## [9] "Cst3_Gucy1b3" "Dnm1l_Sp8" "Gucy1b3_Mlc1" "Gucy1b3_Tuba1a"
## [13] "Impact_Tiam1" "Itm2c_Pmm1" "Itm2c_Prrc2b" "Mlc1_Prune2"
## [17] "Prune2_Tiam1" "Sp8_Uchl1" "Tuba1a_Vdac3"
##
## $`7`
## [1] "Actb_Tiam1" "Actb_Vdac3" "Atp6v1c1_Calm1" "Atp6v1c1_Tmsb4x"
## [5] "Calm1_Plekha1" "Calm1_Wdfy3" "Cldn11_Cnp" "Cldn11_Scd2"
## [9] "Cnp_Itm2c" "Cnp_Tmsb4x" "Cst3_Luzp2" "Dnm1l_Plekha1"
## [13] "Dnm1l_Vdac3" "Gm15421_Itm2c" "Gm15421_Tmsb4x" "Gucy1b3_Ildr2"
## [17] "Gucy1b3_Tiam1" "Gucy1b3_Tmsb4x" "Ildr2_Itm2c" "Ildr2_Tiam1"
## [21] "Ildr2_Tuba1a" "Itm2c_Tiam1" "Itm2c_Vdac3" "Mlc1_Slc1a3"
## [25] "Mtor_Wdfy3" "Plekha1_Tmsb4x" "Plekha1_Vdac3" "Prrc2b_Vdac3"
## [29] "Smim13_Vdac3" "Tiam1_Tmsb4x" "Tmsb4x_Tuba1a" "Tmsb4x_Vdac3"
## [33] "Vdac3_Wdfy3"
##
## $`6`
## [1] "Actb_Itm2c" "Actb_Smim13" "Actb_Sp8" "Atp6v1c1_Cst3"
## [5] "Atp6v1c1_Luzp2" "Atp6v1c1_Mlc1" "Atp6v1c1_Sp8" "Calm1_Dnm1l"
## [9] "Calm1_Smim13" "Calm1_Sp8" "Cst3_Smim13" "Gucy1b3_Smim13"
## [13] "Itm2c_Smim13" "Itm2c_Sp8" "Itm2c_Tuba1a" "Prrc2b_Sp8"
## [17] "Prune2_Tuba1a" "Scd2_Smim13" "Scd2_Sp8" "Smim13_Tmsb4x"
## [21] "Smim13_Tuba1a" "Sp8_Tmsb4x" "Sp8_Tuba1a" "Sp8_Vdac3"
##
## $`4`
## [1] "Abat_Tiam1" "Atp6v1c1_Plekha1" "Atp6v1c1_Tiam1" "Atrnl1_Ccng1"
## [5] "Bai1_Tiam1" "Calm1_Tiam1" "Cldn11_Ildr2" "Gucy1b3_Plekha1"
## [9] "Igf2_Pmm1" "Ildr2_Plekha1" "Impact_Ntng1" "Impact_Plekha1"
## [13] "Plekha1_Tiam1" "Prrc2b_Tiam1" "Tiam1_Tuba1a" "Tiam1_Vdac3"
##
## $`1`
## [1] "Abat_Arrb1" "Actb_Myo10" "Arrb1_Cnp" "Arrb1_Dnm3"
## [5] "Arrb1_Mtor" "Arrb1_Prune2" "Arrb1_Smim13" "Arrb1_Sp8"
## [9] "Arrb1_Tmem47" "Arrb1_Uchl1" "Atp6v1c1_Myo10" "Atrnl1_Myo10"
## [13] "Cst3_Myo10" "Dnm1l_Fam63b" "Gucy1b3_Tmem47" "Luzp2_Uchl1"
## [17] "Mlc1_Mtor" "Myo10_Uchl1"
##
## $`2`
## [1] "Abat_Cst3" "Abat_Mlc1" "Abat_Slc1a3" "Arrb1_Cst3"
## [5] "Bai1_Cst3" "Bai1_Slc1a3" "Calm1_Cst3" "Cldn11_Cst3"
## [9] "Cldn11_Itm2c" "Cst3_Impact" "Cst3_Itm2c" "Cst3_Mtor"
## [13] "Cst3_Prrc2b" "Cst3_Prune2" "Cst3_Sp8" "Cst3_Tuba1a"
## [17] "Cst3_Uchl1" "Cst3_Wdfy3" "Dnm1l_Dnm3" "Gucy1b3_Uchl1"
## [21] "Ildr2_Mlc1" "Impact_Mlc1" "Impact_Tmem47" "Itm2c_Uchl1"
## [25] "Mlc1_Uchl1" "Mtor_Slc1a3" "Mtor_Tmem47" "Mtor_Vdac3"
## [29] "Myo10_Slc1a3" "Ntng1_Utrn" "Prune2_Wdfy3" "Smim13_Sp8"
## [33] "Smim13_Tmem47" "Tuba1a_Uchl1" "Uchl1_Vdac3"
##
## $`8`
## [1] "Bai1_Ccng1" "Ccng1_Smim13" "Dnm3_Fam63b" "Gucy1b3_Myo10"
## [5] "Impact_Myo10" "Myo10_Sp8" "Myo10_Tiam1" "Ntng1_Tiam1"
##
## $`9`
## [1] "Ccng1_Cldn11" "Ccng1_Erbb2ip" "Ccng1_Scd2" "Igf2_Mtor"
##
## $`3`
## [1] "Abat_Myo10" "Atrnl1_Gm15421" "Dnm3_Tiam1" "Gm15421_Ildr2"
## [5] "Itm2c_Myo10" "Mlc1_Myo10" "Mtor_Myo10" "Myo10_Prune2"
## [9] "Myo10_Scd2" "Tiam1_Wdfy3"
write.table(sort_df(data.frame(grouping = hc_mean_genepairs_fac, cluster = names(hc_mean_genepairs_fac)), "grouping"), file = "output/hc_mean_genepairs_fac_split.txt", col.names = TRUE, row.names = FALSE, quote = FALSE, sep = "\t")
hc_mean_meanwcorsSig = apply(wcorsSig, 2, function(x)
tapply(x, hc_mean_genepairs_fac, mean)
)
df_hc_mean_genepairs = data.frame(
x = rep(coords[,1],times = nrow(hc_mean_meanwcorsSig)),
y = rep(coords[,2],times = nrow(hc_mean_meanwcorsSig)),
hc_mean_genepairs = factor(rep(levels(hc_mean_genepairs_fac), each = ncol(meanwcorsSig)),
levels = levels(hc_mean_genepairs_fac)),
hc_mean_genepairs_wc = c(t(hc_mean_meanwcorsSig))
)
g_mean = ggplot(df_hc_mean_genepairs, aes(x = -x, y = y, colour = hc_mean_genepairs_wc)) +
geom_point(size = 0.7) +
facet_grid(~hc_mean_genepairs) +
theme_minimal() +
theme(panel.grid = element_blank(),
axis.text = element_blank()) +
scale_colour_gradient2(low = "blue", mid = "white", high = "red", limits = c(-1,1)) +
theme(legend.position = "none") +
xlab("") +
ylab("") +
coord_fixed() +
NULL
g_mean
cowplot::ggsave2(g_mean, file = "output/split_heatmap_genepairs.pdf", height = 1.3, width = 9)
pdf("output/split_heatmap.pdf",height = 10, width = 15)
tmat = t(wcorsSig)
colnames(tmat) <- gsub("cluster_","", colnames(tmat))
h = ComplexHeatmap::Heatmap(tmat,
cluster_columns = hc_mean_genepairs,
cluster_rows = hc_mean_cells,
row_dend_reorder = FALSE,
column_dend_reorder = FALSE,
row_split = kk_cells,
column_split = kk,
show_heatmap_legend = FALSE,
show_column_names = FALSE
)
print(h)
dev.off()
## quartz_off_screen
## 2
grob = grid.grabExpr(draw(h))
tmatmean = t(apply(tmat, 1, function(x) tapply(x, hc_mean_genepairs_fac, mean)))
hh = ComplexHeatmap::Heatmap(tmatmean, cluster_columns = FALSE,
cluster_rows = hc_mean_cells,
row_dend_reorder = FALSE,
column_dend_reorder = FALSE,
row_split = kk_cells,
show_heatmap_legend = FALSE,
show_column_names = FALSE,
column_title = "Gene pair clusters",
row_title = "Cells",
row_title_gp = gpar(fontsize = 20),
column_title_gp = gpar(fontsize = 20),
border = "black"
)
hh
grobh = grid.grabExpr(draw(hh))
pdf("output/split_heatmap_combined.pdf",height = 7, width = 11, useDingbats = FALSE)
cowplot::plot_grid(grob,
g_cells + theme(plot.margin = margin(2,0.5,0.2,-0.5,unit = "cm")) +
theme(strip.text = element_blank()),
g_mean + theme(plot.margin = margin(0,0,0,1.5, unit = "cm")) +
theme(strip.text = element_blank()) +
NULL
,
ncol = 2,
rel_heights = c(7,1),
rel_widths = c(9,1))
dev.off()
## quartz_off_screen
## 2
pdf("output/split_heatmap_combined_summarised.pdf",height = 8, width = 9, useDingbats = FALSE)
cowplot::plot_grid(grobh,
g_cells +
theme(plot.margin = margin(1.3,0.5,-0.2,-0.5,unit = "cm")) +
theme(strip.text = element_blank()),
g_mean +
theme(plot.margin = margin(0,0,0,1.5, unit = "cm")) +
theme(strip.text = element_blank()),
ncol = 2,
rel_heights = c(8,1),
rel_widths = c(8,1))
dev.off()
## quartz_off_screen
## 2
if (!file.exists("output/GO_list.Rds")) {
library(GO.db)
library(org.Mm.eg.db)
keys = keys(org.Mm.eg.db)
columns(org.Mm.eg.db)
GO_info = select(org.Mm.eg.db, keys=keys, columns = c("SYMBOL", "GO"))
keep = GO_info$SYMBOL %in% rownames(counts)
table(keep)
GO_info_filt = GO_info[keep,]
# at least 10 genes in the term and less than 500
# allTerms = names(which(table(GO_info_filt$GO) >= 10 & table(GO_info_filt$GO) <= 500))
sizeTerms = names(which(table(GO_info_filt$GO) >= 10 & table(GO_info_filt$GO) <= 500))
# also have at least one of the genes which we actually test
testTerms = subset(GO_info_filt, SYMBOL %in% intersect(HVG, nonDEgenes))$GO
allTerms = intersect(sizeTerms, testTerms)
GO_info_terms = select(GO.db, columns = columns(GO.db), keys = allTerms)
rownames(GO_info_terms) <- GO_info_terms$GOID
allTermNames = GO_info_terms[allTerms, "TERM"]
names(allTermNames) <- allTerms
GO_list = sapply(allTerms, function(term) {
print(term)
genes = GO_info_filt[GO_info_filt$GO == term, "SYMBOL"]
return(sort(unique(genes[!is.na(genes)])))
}, simplify = FALSE)
names(GO_list) <- allTermNames
saveRDS(GO_list, file = "output/GO_list.Rds")
} else {
GO_list = readRDS("output/GO_list.Rds")
}
if (!file.exists("output/superclusterstotest_GO.Rds")) {
superclusterstotest_GO = lapply(geneClustMembers, function(set) {
print("testing..")
genesetGOtest(set, rownames(counts), GO_list)
}
)
saveRDS(superclusterstotest_GO, file = "output/superclusterstotest_GO.Rds")
} else {
superclusterstotest_GO <- readRDS("output/superclusterstotest_GO.Rds")
}
gList = lapply(superclusterstotest_GO, function(pval) {
df = data.frame(term = factor(names(pval), levels = c(names(pval), "")),
pval = pval,
qval = p.adjust(pval, method = "BH"))
df$label = df$term
df$label[pval != 1] <- ""
df_sorted = sort_df(df, "pval")[1:10,]
df_sorted$term = factor(df_sorted$term, levels = rev(df_sorted$term))
g = ggplot(df_sorted, aes(x = term, y = -log10(pval), fill = qval < 0.05)) +
theme_classic() +
geom_col() +
coord_flip() +
xlab("") +
ylab("-log10(P-value)") +
# geom_hline(yintercept = -log10(0.01), colour = "red", linetype = "dashed", size = 1) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_fill_manual(values = c("TRUE" = "dimgrey", "FALSE" = "peachpuff")) +
theme(legend.position = "none") +
NULL
return(g)
})
gTogether = sapply(seq_len(length(gList)), function(i){
g_1 = gList[[i]] +
theme(axis.text = element_text(size = 12))
n = ggtitle(paste0("Cluster ",i))
g_2 = gList_cells[[i]] + theme(title = element_text(size = 20)) +
coord_fixed()
pdf(paste0("output/go_cluster_plot_cluster_",i,".pdf"),
height = 7, width = 4.5)
scater::multiplot(g_2 + n, g_1 + ylab(""), layout = matrix(c(1,1,2,2,2), ncol = 1))
dev.off()
return(g_1 + g_2)
}, simplify = FALSE)
Using script run on cluster due to high computational demand. Note that the file span_output.RData is around 60MB.
if (!file.exists("output/span_output.RData")) {
source("span_MOB.R")
}
load("output/span_output.RData")
span_p_all_all = do.call(rbind, span_p_all)
span_p_all_all$testing <- rep(names(span_p_all),
times = unlist(lapply(span_p_all, nrow)))
span_pvals = do.call(cbind,lapply(span_p_all, "[", "pval"))
colnames(span_pvals) <- names(span_p_all)
span_cor = cor(-log10(span_pvals), method = "spearman")
g = ggpairs(-log10(span_pvals),
lower = list(continuous = wrap("points", alpha = 0.3, size=0.1),
combo = wrap("dot", alpha = 0.4, size=0.2)),
title = "-log10(P-value)"
)
g
ggsave(g, file = "output/span_pairs.pdf", height = 12, width = 14)
pdf("output/span_corrplot.pdf", height = 10, width = 10)
g_cor = corrplot(span_cor, order = "original")
dev.off()
## quartz_off_screen
## 2
pdf("output/span_upset.pdf", height = 10, width = 16, onefile = FALSE)
upset(data = as.data.frame(as.matrix(1*(apply(span_pvals,2,p.adjust, method = "BH") < 0.2))),
sets = colnames(span_pvals),
order.by = "freq",
text.scale = 2)
dev.off()
## quartz_off_screen
## 2
# distribution graphs
span_distn = data.frame(
quantile_999 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.999)))),
quantile_90 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.90)))),
quantile_95 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.95)))),
quantile_99 = unlist(lapply(span_perms, function(x) unlist(lapply(x, quantile, probs = 0.99)))),
global = unlist(lapply(span_global, "[", span_sampled_ind)),
span = rep(names(span_perms), times = unlist(lapply(span_perms, length)))
)
span_distn <- reshape::sort_df(span_distn, "global")
span_distn$quantile_999_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_999 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_90_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_90 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_95_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_95 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$quantile_99_fitted = unsplit(lapply(split.data.frame(span_distn, span_distn$span),
function(df){
loess(quantile_99 ~ global, data = df)$fitted
}), span_distn$span)
span_distn$span = factor(span_distn$span,levels = names(span_perms))
span_nicecolours = RColorBrewer::brewer.pal(name = "Blues", 9)
names(span_nicecolours) <- c(rep("", 5), "0.90","0.95","0.99","0.999")
g = ggplot(span_distn, aes(x = global)) +
geom_point(aes(y = quantile_999, colour = "0.999"),
alpha = 0.5) +#, colour = span_nicecolours[9]) +
geom_line(aes(y = quantile_999_fitted), colour = span_nicecolours[9]) +
geom_point(aes(y = quantile_99, colour = "0.99"),
alpha = 0.5) + #, colour = span_nicecolours[8]) +
geom_line(aes(y = quantile_99_fitted), colour = span_nicecolours[8]) +
geom_point(aes(y = quantile_95, colour = "0.95"),
alpha = 0.5) + #, colour = span_nicecolours[7]) +
geom_line(aes(y = quantile_95_fitted), colour = span_nicecolours[7]) +
geom_point(aes(y = quantile_90, colour = "0.90"),
alpha = 0.5) + #, colour = span_nicecolours[6]) +
geom_line(aes(y = quantile_90_fitted), colour = span_nicecolours[6]) +
facet_wrap(~span, scales = "free", ncol = 4) +
theme_classic() +
xlab("Global higher order statistic value") +
ylab("Permuted scHOT test statistic quantile") +
scale_colour_manual(name="Quantiles",values=span_nicecolours[6:9]) +
theme(legend.position = "bottom") +
NULL
g
ggsave(g, file = "output/span_pvalEstimation.pdf",
height = 12, width = 14)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin18.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] UpSetR_1.4.0 corrplot_0.84
## [3] GGally_1.4.0 cowplot_1.0.0
## [5] ggpubr_0.2.5 magrittr_1.5
## [7] patchwork_1.0.0 ggforce_0.3.1
## [9] stringr_1.4.0 org.Mm.eg.db_3.10.0
## [11] GO.db_3.10.0 AnnotationDbi_1.48.0
## [13] ComplexHeatmap_2.2.0 dynamicTreeCut_1.63-1
## [15] scattermore_0.4 reshape_0.8.8
## [17] scran_1.14.6 scater_1.14.6
## [19] SingleCellExperiment_1.8.0 SummarizedExperiment_1.16.1
## [21] DelayedArray_0.12.2 BiocParallel_1.20.1
## [23] matrixStats_0.55.0 Biobase_2.46.0
## [25] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [27] IRanges_2.20.2 S4Vectors_0.24.3
## [29] BiocGenerics_0.32.0 gplots_3.0.1.2
## [31] ggplot2_3.2.1 DCARS_0.3.5
##
## loaded via a namespace (and not attached):
## [1] ggbeeswarm_0.6.0 colorspace_1.4-1 deldir_0.1-25
## [4] ggsignif_0.6.0 rjson_0.2.20 circlize_0.4.8
## [7] XVector_0.26.0 GlobalOptions_0.1.1 BiocNeighbors_1.4.1
## [10] clue_0.3-57 farver_2.0.3 bit64_0.9-7
## [13] codetools_0.2-16 knitr_1.28 polyclip_1.10-0
## [16] cluster_2.1.0 png_0.1-7 compiler_3.6.1
## [19] dqrng_0.2.1 assertthat_0.2.1 Matrix_1.2-18
## [22] lazyeval_0.2.2 limma_3.42.2 tweenr_1.0.1
## [25] BiocSingular_1.2.2 htmltools_0.4.0 tools_3.6.1
## [28] rsvd_1.0.3 igraph_1.2.4.2 gtable_0.3.0
## [31] glue_1.3.1 GenomeInfoDbData_1.2.2 reshape2_1.4.3
## [34] dplyr_0.8.4 Rcpp_1.0.3 vctrs_0.2.2
## [37] gdata_2.18.0 DelayedMatrixStats_1.8.0 xfun_0.12
## [40] lifecycle_0.1.0 irlba_2.3.3 gtools_3.8.1
## [43] statmod_1.4.34 edgeR_3.28.0 zlibbioc_1.32.0
## [46] MASS_7.3-51.5 scales_1.1.0 RColorBrewer_1.1-2
## [49] yaml_2.2.1 memoise_1.1.0 gridExtra_2.3
## [52] stringi_1.4.6 RSQLite_2.2.0 caTools_1.18.0
## [55] shape_1.4.4 rlang_0.4.4 pkgconfig_2.0.3
## [58] bitops_1.0-6 evaluate_0.14 lattice_0.20-40
## [61] purrr_0.3.3 labeling_0.3 bit_1.1-15.2
## [64] tidyselect_1.0.0 plyr_1.8.5 R6_2.4.1
## [67] DBI_1.1.0 pillar_1.4.3 withr_2.1.2
## [70] RCurl_1.98-1.1 tibble_2.1.3 crayon_1.3.4
## [73] KernSmooth_2.23-16 rmarkdown_2.1 viridis_0.5.1
## [76] GetoptLong_0.1.8 locfit_1.5-9.1 blob_1.2.1
## [79] digest_0.6.24 munsell_0.5.0 beeswarm_0.2.3
## [82] viridisLite_0.3.0 vipor_0.4.5